###################################
# Fixed effects models
###################################
#by Katie Devenish katie.devenish@manchester.ac.uk


library("dplyr")
library("plm")
library("gplots")
library("ggplot2")
library("tidyr")
library("fixest")
library("car")
library("lmtest")

x <- read.csv()

# Loop to read in files from directory. Assign function allows you to assign name based on original filename to the read-in data.
files <- list.files()

for(i in 1:length(files)){
  x <- read.csv(paste0("SUPA_Input/", files[i]))
  assign(paste0(gsub("\\SUPA_|.csv", "", files[i])), x)
}


# x is the input dataframe. Name is the name of the comparison e.g. SUPA_nonpa
# Use func1 for all comparisons except with mining and nonpa

options(scipen = 0)

func1 <- function(x, PA_cat, control){
  x <- subset(x, select = -c(deforest.10, deforest.00)) # Remove unwanted defor columns
  
  # Reformat data to panel format 
  
  x <- x %>% slice(rep(1:n(), each = 2))                # Duplicate each observation            
  x$Year <- rep(c("2000", "2010"), length(unique(x$CU_ID_50M)))      # Add column for Year.
  x$perc_for <- 0                                       # Add column for each predictor which the transposed data will go into.
  x$income <- 0
  x$literacy <- 0
  x$sanitation <- 0
  x$gini <- 0
  x$treatment2 <- 0
  x$population <- 0
  x$mines <- 0
  index <- which(x$Year == "2000")
  
  # For perc_for
  
  for(i in index){
    x$perc_for[i:nrow(x)] <- as.numeric(t(x[i, c("forest.00_alternative", "forest.10_alternative")]))
  }
  
  # For income
  
  for(i in index){
    x$income[i:nrow(x)] <- as.numeric(t(x[i, c("income_infl.00", "income.10")]))
  }
  
  # For literacy
  
  for(i in index){
    x$literacy[i:nrow(x)] <- as.numeric(t(x[i, c("literacy.00", "literacy.10")]))
  }
  
  # For sanitation
  
  for(i in index){
    x$sanitation[i:nrow(x)] <- as.numeric(t(x[i, c("sanitation.00", "sanitation.10")]))
  }
  
  # For gini
  
  for(i in index){
    x$gini[i:nrow(x)] <- as.numeric(t(x[i, c("gini.00", "gini.10")]))
  }
  
  # For treatment
  
  x$treatment2[x$treatment == 1 & x$Year == "2010"] <- 1 
  
  # Check it is correct
  
  print(which(x$treatment == 0 & x$treatment2 == 1))
  print(which(x$treatment == 1 & x$Year== "2000" & x$treatment2 ==1))
  
  # For population
  
  for(i in index){
    x$population[i:nrow(x)] <- as.numeric(t(x[i, c("population.00", "population.10")]))
  }
  
  # For mines
  
  for(i in index){
    x$mines[i:nrow(x)] <- as.numeric(t(x[i, c("mines.00", "mines.10")]))
  }
  
  x$treatment2 <- as.factor(x$treatment2)
  
  
  # Run regressions
  
 # res_forest <- plm(perc_for ~ treatment2 + income + literacy + sanitation + gini + population,
  #                  data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within")
  
#  summary(res_forest)   # Matches. Protection leads to an increase in forest cover! 
  
 
  ## Use pFtest to check whether a fixed effects model is better than OLS. 
  
 # pooled_forest <- plm(perc_for ~ treatment2 + income + literacy + sanitation + gini + population,
  #                     data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling")
  
  
#  v <- pFtest(res_forest, pooled_forest)            # Fixed effects model is better
#  test_forest <- v$p.value
  
  ## Try using fixest package instead
  
  res_forest<- feols(perc_for ~ treatment2 + income + literacy + sanitation + gini + population + mines| CU_ID_50M + Year, data = x,
                      weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_forest, vcov = "hetero")
  
  bp <- print(bptest(perc_for ~ treatment2 + income + literacy + sanitation + gini + population + mines + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value

  y <- coeftable(res_forest, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_forest, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Forest"
  y$PA_type <- PA_cat
  
  Values <- y
  
  
  ####
  
#  res_income <- plm(income ~ treatment2 + perc_for + literacy + sanitation + gini + population + mines,
       #             data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
#  summary(res_income)    # Matches original results
  
#  pooled_income <- plm(income ~ treatment2 + perc_for + literacy + sanitation + gini + population + mines,
      #                 data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling") 
  
  #  v <- pFtest(res_income, pooled_income)    # Fixed effects model is better
  #  test_income <- v$p.value
  
  
  res_income <- feols(income ~ treatment2 + perc_for + literacy + sanitation + gini + population + mines| CU_ID_50M + Year, data = x,
                weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_income, vcov = "hetero")
  
  bp <- print(bptest(income ~ treatment2 + perc_for + literacy + sanitation + gini + population + mines + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_income, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_income, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")

  y$Outcome <- "Income"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)

 
  #############
  
#  res_literacy <- plm(literacy ~ treatment2 + perc_for + income + sanitation + gini + population + mines,
#                      data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
#  summary(res_literacy)   # Differs
  
#  pooled_literacy <- plm(literacy ~ treatment2 + perc_for + income + sanitation + gini + population + mines,
#                         data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling")
  
  res_literacy <- feols(literacy ~ treatment2 + perc_for + income + sanitation + gini + population + mines| CU_ID_50M + Year, data = x,
                weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_literacy, vcov = "hetero")
  
  
#  v <- pFtest(res_literacy, pooled_literacy)  # Fixed effects model is better
#  test_literacy <- v$p.value
  
  bp <- print(bptest(literacy ~ treatment2 + perc_for + income + sanitation + gini + population + mines + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_literacy, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_literacy, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Literacy"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
 
  
  ########
  
  
#  res_sanitation <- plm(sanitation ~ treatment2 + perc_for + income + literacy + gini + population + mines,
#                        data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
#  summary(res_sanitation)   # Matches
  
#  pooled_sanitation <- plm(sanitation ~ treatment2 + perc_for + income + literacy + gini + population + mines,
#                           data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "pooling") 
  
#  v <- pFtest(res_sanitation, pooled_sanitation)  # Fixed effects model is better
# test_sanitation <- v$p.value

  res_sanitation <- feols(sanitation ~ treatment2 + perc_for + income + literacy + gini + population + mines| CU_ID_50M + Year, data = x,
                weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_sanitation, vcov = "hetero")
  
  
  bp <- print(bptest(sanitation ~ treatment2 + perc_for + income + literacy + gini + population + mines + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_sanitation, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_sanitation, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Sanitation"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)

  #######

#  res_gini <- plm(gini ~ treatment2 + perc_for + income + sanitation + literacy + population + mines,
#                  data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
#  summary(res_gini)   # Matches to 95% confidence interval
  
#  pooled_gini <- plm(gini ~ treatment2 + perc_for + income + sanitation + literacy + population + mines,
#                     data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "pooling") 
  
# v <- pFtest(res_gini, pooled_gini)  # Fixed effects model is better
# test_gini <- v$p.value

  
  res_gini <- feols(gini ~ treatment2 + perc_for + income + sanitation + literacy + population + mines| CU_ID_50M + Year, data = x,
                weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_gini, vcov = "hetero")
  
  bp <- print(bptest(gini ~ treatment2 + perc_for + income + sanitation + literacy + population + mines + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_gini, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_gini, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Gini"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
  
  return(Values)
}


Res_SUPA_large_landholders <- func1(large_landholders, "SUPA", "large_landholders")

Res_SUPA_medium_landholders <- func1(medium_landholders, "SUPA", "medium_landholders")

Res_SUPA_small_landholders <- func1(small_landholders, "SUPA", "small_landholders")

Res_SUPA_sparselypopulated <- func1(sparselypopulated, "SUPA", "sparsely_populated")

Res_SUPA_verysmall_landholders <- func1(verysmall_landholders, "SUPA", "very_small_landholders")


# Create second function without the mining predictor for PAs to mining and nonpa comparisons.

func_nonmining <- function(x, PA_cat, control){
  x <- subset(x, select = -c(deforest.10, deforest.00)) # Remove unwanted defor columns
  
  # Reformat data to panel format 
  
  x <- x %>% slice(rep(1:n(), each = 2))                # Duplicate each observation            
  x$Year <- rep(c("2000", "2010"), length(unique(x$CU_ID_50M)))      # Add column for Year.
  x$perc_for <- 0                                       # Add column for each predictor which the transposed data will go into.
  x$income <- 0
  x$literacy <- 0
  x$sanitation <- 0
  x$gini <- 0
  x$treatment2 <- 0
  x$population <- 0
  x$mines <- 0
  index <- which(x$Year == "2000")
  
  # For perc_for
  
  for(i in index){
    x$perc_for[i:nrow(x)] <- as.numeric(t(x[i, c("forest.00_alternative", "forest.10_alternative")]))
  }
  
  # For income
  
  for(i in index){
    x$income[i:nrow(x)] <- as.numeric(t(x[i, c("income_infl.00", "income.10")]))
  }
  
  # For literacy
  
  for(i in index){
    x$literacy[i:nrow(x)] <- as.numeric(t(x[i, c("literacy.00", "literacy.10")]))
  }
  
  # For sanitation
  
  for(i in index){
    x$sanitation[i:nrow(x)] <- as.numeric(t(x[i, c("sanitation.00", "sanitation.10")]))
  }
  
  # For gini
  
  for(i in index){
    x$gini[i:nrow(x)] <- as.numeric(t(x[i, c("gini.00", "gini.10")]))
  }
  
  # For treatment
  
  x$treatment2[x$treatment == 1 & x$Year == "2010"] <- 1 
  
  # Check it is correct
  
  print(which(x$treatment == 0 & x$treatment2 == 1))
  print(which(x$treatment == 1 & x$Year== "2000" & x$treatment2 ==1))
  
  # For population
  
  for(i in index){
    x$population[i:nrow(x)] <- as.numeric(t(x[i, c("population.00", "population.10")]))
  }
  
  # For mines
  
  for(i in index){
    x$mines[i:nrow(x)] <- as.numeric(t(x[i, c("mines.00", "mines.10")]))
  }
  
  x$treatment2 <- as.factor(x$treatment2)
  
  
  # Run regressions
  
  # res_forest <- plm(perc_for ~ treatment2 + income + literacy + sanitation + gini + population,
  #                  data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within")
  
  #  summary(res_forest)   # Matches. Protection leads to an increase in forest cover! 
  
  
  ## Use pFtest to check whether a fixed effects model is better than OLS. 
  
  # pooled_forest <- plm(perc_for ~ treatment2 + income + literacy + sanitation + gini + population,
  #                     data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling")
  
  
  #  v <- pFtest(res_forest, pooled_forest)            # Fixed effects model is better
  #  test_forest <- v$p.value
  
  ## Try using fixest package instead
  
  res_forest<- feols(perc_for ~ treatment2 + income + literacy + sanitation + gini + population| CU_ID_50M + Year, data = x,
                     weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_forest, vcov = "hetero")
  
  bp <- print(bptest(perc_for ~ treatment2 + income + literacy + sanitation + gini + population + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_forest, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_forest, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Forest"
  y$PA_type <- PA_cat
  
  Values <- y
  
  
  ####
  
  #  res_income <- plm(income ~ treatment2 + perc_for + literacy + sanitation + gini + population,
  #             data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
  #  summary(res_income)    # Matches original results
  
  #  pooled_income <- plm(income ~ treatment2 + perc_for + literacy + sanitation + gini + population,
  #                 data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling") 
  
  #  v <- pFtest(res_income, pooled_income)    # Fixed effects model is better
  #  test_income <- v$p.value
  
  
  res_income <- feols(income ~ treatment2 + perc_for + literacy + sanitation + gini + population| CU_ID_50M + Year, data = x,
                      weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_income, vcov = "hetero")
  
  bp <- print(bptest(income ~ treatment2 + perc_for + literacy + sanitation + gini + population + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_income, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_income, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Income"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
  
  
  #############
  
  #  res_literacy <- plm(literacy ~ treatment2 + perc_for + income + sanitation + gini + population,
  #                      data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
  #  summary(res_literacy)   # Differs
  
  #  pooled_literacy <- plm(literacy ~ treatment2 + perc_for + income + sanitation + gini + population,
  #                         data = x, weights = weights, index = c("CU_ID_50M", "Year"), model = "pooling")
  
  res_literacy <- feols(literacy ~ treatment2 + perc_for + income + sanitation + gini + population | CU_ID_50M + Year, data = x,
                        weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_literacy, vcov = "hetero")
  
  
  #  v <- pFtest(res_literacy, pooled_literacy)  # Fixed effects model is better
  #  test_literacy <- v$p.value
  
  bp <- print(bptest(literacy ~ treatment2 + perc_for + income + sanitation + gini + population + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_literacy, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_literacy, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Literacy"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
  
  
  ########
  
  
  #  res_sanitation <- plm(sanitation ~ treatment2 + perc_for + income + literacy + gini + population,
  #                        data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
  #  summary(res_sanitation)   # Matches
  
  #  pooled_sanitation <- plm(sanitation ~ treatment2 + perc_for + income + literacy + gini + population,
  #                           data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "pooling") 
  
  #  v <- pFtest(res_sanitation, pooled_sanitation)  # Fixed effects model is better
  # test_sanitation <- v$p.value
  
  res_sanitation <- feols(sanitation ~ treatment2 + perc_for + income + literacy + gini + population | CU_ID_50M + Year, data = x,
                          weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_sanitation, vcov = "hetero")
  
  
  bp <- print(bptest(sanitation ~ treatment2 + perc_for + income + literacy + gini + population + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_sanitation, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_sanitation, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Sanitation"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
  
  #######
  
  #  res_gini <- plm(gini ~ treatment2 + perc_for + income + sanitation + literacy + population,
  #                  data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "within") 
  #  summary(res_gini)   # Matches to 95% confidence interval
  
  #  pooled_gini <- plm(gini ~ treatment2 + perc_for + income + sanitation + literacy + population,
  #                     data = x, weights = weights, effect = "twoways", index = c("CU_ID_50M", "Year"), model = "pooling") 
  
  # v <- pFtest(res_gini, pooled_gini)  # Fixed effects model is better
  # test_gini <- v$p.value
  
  
  res_gini <- feols(gini ~ treatment2 + perc_for + income + sanitation + literacy + population| CU_ID_50M + Year, data = x,
                    weights = x$weights, panel.id = ~ CU_ID_50M + Year)
  summary(res_gini, vcov = "hetero")
  
  bp <- print(bptest(gini ~ treatment2 + perc_for + income + sanitation + literacy + population + factor(CU_ID_50M), data = x, studentize=F))
  bp <- bp$p.value
  
  y <- coeftable(res_gini, vcov = "hetero")
  y <- as.data.frame(t(y))
  y <- as.data.frame(y[c(1,2,4), 1])
  confints <- as.data.frame(t(confint(res_gini, vcov = "hetero")[1,]))
  names(y) <- control
  names(confints) <- control
  y <- rbind(y, bp, confints)
  row.names(y) <- c("coefficient", "Std Error", "p-value", "bp_test", "lower_CI", "upper_CI")
  
  y$Outcome <- "Gini"
  y$PA_type <- PA_cat
  
  Values <- rbind(Values, y)
  
  return(Values)
}

Res_SUPA_nonpa <- func_nonmining(nonpa, "SUPA", "nonpa")

Res_SUPA_mining <- func_nonmining(mining, "SUPA", "mining")

SUPA_values <- cbind(Res_SUPA_nonpa, Res_SUPA_sparselypopulated, Res_SUPA_verysmall_landholders, Res_SUPA_small_landholders, 
                      Res_SUPA_medium_landholders, Res_SUPA_large_landholders, Res_SUPA_mining)


write.csv(SUPA_values, "Outputs/All_res_SUPA_updated_12_23.csv")


# Join all values datasets together

All_values <- rbind(SPA_values, SUPA_values, IT_values)

All_values <- subset(All_values, select = -c(2,3,5,6,8,9,11,12,14,15,17,18))

write.csv(All_values, "Outputs/All_values_updated_12_23.csv")

All_values$x <- rep(c("coefficient", "std_error", "pvalue", "bp_test", "lower_CI", "upper_CI"), 15)

All_values <- All_values[, c(10, 9,8, 1:7)]

pvalues <- filter(All_values, x == "pvalue")
pvalues <- gather(pvalues, "Comparison", "pvalue", 4:10)

# Adjust p-values

Orig_pvalues <- arrange(pvalues, pvalue)   # Sort lowest to highest

Adj_pvalues <- p.adjust(Orig_pvalues$pvalue ,method = "BH")

Orig_pvalues$pvalue <- Adj_pvalues         # Put adjusted values back in.

New_pvalues <- Orig_pvalues

# Put in same order as All_values_adj so new_pvalues can be reinserted.

New_pvalues$PA_type <- factor(New_pvalues$PA_type, levels = c("SPA", "SUPA", "IT"))

New_pvalues$Outcome <- factor(New_pvalues$Outcome, levels = c("Forest", "Income", "Literacy", "Sanitation", "Gini"))

New_pvalues$Comparison <- factor(New_pvalues$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders", "small_landholders",
                                                                    "medium_landholders", "large_landholders", "mining"))
New_pvalues <- arrange(New_pvalues, PA_type, Outcome, Comparison)


# change the data to wide format so it can be inserted back into original dataframe

New_pvalues <- spread(New_pvalues, Comparison, pvalue)

index <- which(All_values$x == "pvalue")

All_values_adj <- All_values

All_values_adj[index, 4:10] <- New_pvalues[, 4:10]

write.csv(All_values_adj, "Outputs/Final_results_adj_pvalues_updated_01_24.csv")

filter(All_values_adj, PA_type == "SUPA", Outcome == "Sanitation")
filter(All_values_adj, PA_type == "SUPA", Outcome == "Sanitation")


## TOMORROW - check results in All_ values_adj align with All_res_... results. They do.
## Make coefficient plots

All_forest <- subset(All_values_adj, Outcome == "Forest")
All_forest <- gather(All_forest, "Comparison", "Value", 4:10)
All_forest <- spread(All_forest, "x", "Value")
All_forest$Comparison <- factor(All_forest$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders",
                                                                  "small_landholders", "medium_landholders", "large_landholders", "mining"))
All_forest$PA_type <- factor(All_forest$PA_type, levels = c("SPA","SUPA", "IT"))
                                                
All_forest <- arrange(All_forest, Comparison, PA_type)
All_forest$Index <- as.numeric(row.names(All_forest))                                                                                              


# Forest

ggplot(data = All_forest)+
  scale_color_manual(values = c("#425E54","#76AD8E","#BCE8CA"))+
  geom_point(aes(x = factor(Index), y = coefficient, color = PA_type), size = 1)+
  geom_errorbar(aes(x = factor(Index), ymin = lower_CI, ymax = upper_CI, color = PA_type), size = 0.75)+
  scale_x_discrete(breaks = seq(2, 20, 3), labels = c("nonpa", "sparsely populated", "     very small",
                                                           "small", "medium", "large", "mining")) +
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_y_continuous(breaks = seq(-3, 18, 3))+
 # ggtitle("Forest")+
  theme_bw()+
  labs(x = "Control land cover", y = "Coefficient")+
  theme(axis.text.x = element_text(colour = "black", size = 6, vjust = 1),
        axis.text.y = element_text(colour = "black", size = 6),
        axis.title.x = element_text(vjust = 0.2, size = 7),
        axis.title.y = element_text(size = 7),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()) 


# Income

All_income <- subset(All_values_adj, Outcome == "Income")
All_income <- gather(All_income, "Comparison", "Value", 4:10)
All_income <- spread(All_income, "x", "Value")
All_income$Comparison <- factor(All_income$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders",
                                                                  "small_landholders", "medium_landholders", "large_landholders", "mining"))
All_income$PA_type <- factor(All_income$PA_type, levels = c("SPA","SUPA", "IT"))

All_income <- arrange(All_income, Comparison, PA_type)
All_income$Index <- as.numeric(row.names(All_income))    

ggplot(data = All_income)+
  scale_color_manual(values = c("#803100","#BF4900","#FFA970"))+
  geom_point(aes(x = factor(Index), y = coefficient, color = PA_type), size = 1)+
  geom_errorbar(aes(x = factor(Index), ymin = lower_CI, ymax = upper_CI, color = PA_type), size = 0.75)+
  scale_x_discrete(breaks = seq(2, 20, 3), labels = c("nonpa", "sparsely populated", "     very small",
                                                      "small", "medium", "large", "mining")) +
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_y_continuous(breaks = seq(-350, 850, 150))+
 # ggtitle("Income")+
  theme_bw()+
  labs(x = "Control land cover", y = "Coefficient")+
  theme(axis.text.x = element_text(colour = "black", size = 6, vjust = 1),
        axis.text.y = element_text(colour = "black", size = 6),
        axis.title.x = element_text(vjust = 0.2, size = 7),
        axis.title.y = element_text(size = 7),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


# Sanitation

All_sanitation <- subset(All_values_adj, Outcome == "Sanitation")
All_sanitation <- gather(All_sanitation, "Comparison", "Value", 4:10)
All_sanitation <- spread(All_sanitation, "x", "Value")
All_sanitation$Comparison <- factor(All_sanitation$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders",
                                                                          "small_landholders", "medium_landholders", "large_landholders", "mining"))
All_sanitation$PA_type <- factor(All_sanitation$PA_type, levels = c("SPA","SUPA", "IT"))


All_sanitation <- arrange(All_sanitation, Comparison, PA_type)
All_sanitation$Index <- as.numeric(row.names(All_sanitation))    

options(scipen = 0)

ggplot(data = All_sanitation)+
  scale_color_manual(values = c("#1853DB","#7398F5","#C7D6FF"))+
  geom_point(aes(x = factor(Index), y = coefficient, color = PA_type), size = 1)+
  geom_errorbar(aes(x = factor(Index), ymin = lower_CI, ymax = upper_CI, color = PA_type), size = 0.75)+
  scale_x_discrete(breaks = seq(2, 20, 3), labels = c("nonpa", "sparsely populated", "     very small",
                                                      "small", "medium", "large", "mining")) +
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_y_continuous(breaks = seq(-0.8, 0.5, 0.1))+
#  ggtitle("Sanitation")+
  theme_bw()+
  labs(x = "Control land cover", y = "Coefficient")+
  theme(axis.text.x = element_text(colour = "black", size = 6, vjust = 1),
        axis.text.y = element_text(colour = "black", size = 6),
        axis.title.x = element_text(vjust = 0.2, size = 7),
        axis.title.y = element_text(size = 7),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


# Literacy

All_literacy <- subset(All_values_adj, Outcome == "Literacy")
All_literacy <- gather(All_literacy, "Comparison", "Value", 4:10)
All_literacy <- spread(All_literacy, "x", "Value")
All_literacy$Comparison <- factor(All_literacy$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders",
                                                                      "small_landholders", "medium_landholders", "large_landholders", "mining"))
All_literacy$PA_type <- factor(All_literacy$PA_type, levels = c("SPA","SUPA", "IT"))


All_literacy <- arrange(All_literacy, Comparison, PA_type)
All_literacy$Index <- as.numeric(row.names(All_literacy))    

options(scipen = 0)

ggplot(data = All_literacy)+
  scale_color_manual(values = c("#8C1850","#DC267F","#FFADC9"))+
  geom_point(aes(x = factor(Index), y = coefficient, color = PA_type), size = 1)+
  geom_errorbar(aes(x = factor(Index), ymin = lower_CI, ymax = upper_CI, color = PA_type), size = 0.75)+
  scale_x_discrete(breaks = seq(2, 20, 3), labels = c("nonpa", "sparsely populated", "very small",
                                                      "small", "medium", "large", "mining")) +
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_y_continuous(breaks = seq(-0.5, 0.3, 0.1))+
#  ggtitle("Literacy")+
  theme_bw()+
  labs(x = "Control land cover", y = "Coefficient")+
  theme(axis.text.x = element_text(colour = "black", size = 6, vjust = 1),
        axis.text.y = element_text(colour = "black", size = 6),
        axis.title.x = element_text(vjust = 0.2, size = 7),
        axis.title.y = element_text(size = 7),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


# Gini

All_gini <- subset(All_values_adj, Outcome == "Gini")
All_gini <- gather(All_gini, "Comparison", "Value", 4:10)
All_gini <- spread(All_gini, "x", "Value")
All_gini$Comparison <- factor(All_gini$Comparison, levels = c("nonpa", "sparsely_populated", "very_small_landholders",
                                                                       "small_landholders", "medium_landholders", "large_landholders", "mining"))
All_gini$PA_type <- factor(All_gini$PA_type, levels = c("SPA","SUPA", "IT"))

All_gini <- arrange(All_gini, Comparison, PA_type)
All_gini$Index <- as.numeric(row.names(All_gini))    

options(scipen = 0)

ggplot(data = All_gini)+
  scale_color_manual(values = c("#75640D","#BFAB43","#E6D88A"))+
  geom_point(aes(x = factor(Index), y = coefficient, color = PA_type), size = 1)+
  geom_errorbar(aes(x = factor(Index), ymin = lower_CI, ymax = upper_CI, color = PA_type), size = 0.75)+
  scale_x_discrete(breaks = seq(2, 20, 3), labels = c("nonpa", "sparsely populated", "very small",
                                                      "small", "medium", "large", "mining")) +
  geom_hline(yintercept = 0, linetype = "dashed")+
  scale_y_continuous(breaks = seq(-0.4, 0.4, 0.1))+
#  ggtitle("Gini")+
  theme_bw()+
  labs(x = "Control land cover", y = "Coefficient")+
  theme(axis.text.x = element_text(colour = "black", size = 6, vjust = 1),
        axis.text.y = element_text(colour = "black", size = 6),
        axis.title.x = element_text(vjust = 0.2, size = 7),
        axis.title.y = element_text(size = 7),
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())





